HW 02

Author

Meredith Jean-Baptiste

Published

June 13, 2025

1 - A new day, a new plot, a new geom

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
Loading required package: pacman
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr, forcats)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

devtools::install_github("tidyverse/dsbox")
Skipping install of 'dsbox' from a github remote, the SHA1 (244ecdfe) has not changed since last install.
  Use `force = TRUE` to force installation
library(dsbox)
my_edibnb_data <- edibnb

#make a ridge plot of AirBnB review scores of Edinburgh neighborhoods, ordered by their median review scores
#stat_summary() function to summarize your data. To order the data by mean, you can use the fct_reorder() function from the forcats package to reorder the factor levels based on the calculated means. 
#Note: this did not work for me for some reason
#edibnb %>% 
#  group_by(neighbourhood) %>%
#  summarize(mean_score = mean(review_scores_rating), .groups = "drop") %>% 
#  mutate(neighbourhood = fct_reorder(neighbourhood, mean_score)) %>% 
#  geom_density_ridges(review_scores_rating ~ fct_reorder(neighbourhood, review_scores_rating))+

#I also tried this and did not have any luck with finding the medians
#df1 <- edibnb %>%  group_by(neighbourhood) %>% mutate(med = median(review_scores_rating))

edibnb %>% 
ggplot(aes(x = review_scores_rating, y = neighbourhood))+
  geom_density_ridges()+
  labs(
    x = "Review score ratings",
    y = "Neighbourhood",
    title = "AirBNB review score ratings by Edinburgh neighbourhoods",
    #sources for calculating the means of the scores, how to reorder the means and the geom density plot
    caption = "Source: TidyTuesday"
  )
Picking joint bandwidth of 1.21
Warning: Removed 2177 rows containing non-finite outside the scale range
(`stat_density_ridges()`).

#This geom density ridges plot shows the Air BnB review scores ratings by Edinburgh neighbourhoods. It appears the median review scores are quite high across all neighbourhoods. Unfortunately I had difficulty showing the medians in this graph so all scores are showing for all neighbourhoods. We can visualize the medians are slightly lower for Tollcross, Olde Town and Haymarket however the remainder of the neighbourhoods all have very good scores. There is NA which did not specify a neighbourhood and we can see that even when a neighbourhood was not specified, the review scores remain high.

2 - Foreign Connected PACs

#| label: SETUP
#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
options(width = 65)
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numeric
pac %>% 
   janitor::clean_names()
# A tibble: 2,394 × 6
   year     pac_name_affiliate country_of_origin_pa…¹ total dems 
   <chr>    <chr>              <chr>                  <chr> <chr>
 1 data/Fo… 7-Eleven           Japan/Ito-Yokado       $8500 $1500
 2 data/Fo… ABB Group          Switzerland/Asea Brow… $460… $170…
 3 data/Fo… Accenture          UK/Accenture plc       $759… $230…
 4 data/Fo… ACE INA            UK/ACE Group           $385… $125…
 5 data/Fo… Acuson Corp (Siem… Germany/Siemens AG     $2000 $2000
 6 data/Fo… Adtranz (DaimlerC… Germany/DaimlerChrysl… $105… $100…
 7 data/Fo… AE Staley Manufac… UK/Tate & Lyle         $240… $100…
 8 data/Fo… AEGON USA (AEGON … Netherlands/Aegon NV   $582… $105…
 9 data/Fo… AIM Management Gr… UK/AMVESCAP            $250… $100…
10 data/Fo… Air Liquide Ameri… France/L'Air Liquide … $0    $0   
# ℹ 2,384 more rows
# ℹ abbreviated name: ¹​country_of_origin_parent_company
# ℹ 1 more variable: repubs <chr>
pac$Dems = as.numeric(gsub("\\$", "", pac$Dems))
pac$Repubs = as.numeric(gsub("\\$", "", pac$Repubs))

# Note: another way to format a date into just the year:
# pac$year = format(pac$year, "%Y")

#Datawrangling
#This.was.hard
#split Country/Parent column using '/' as the separator
pac[c('country', 'parent')] <- str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)
#split year column using '.' as the separator
pac <-  separate(pac, year, into = c("full", "csv"), sep = "[.]", extra = "drop")
#split full column using '-' as the separator
pac[c('first', 'year')] <- str_split_fixed(pac$`full`, '-', 2)
#convert year to numeric(double)
pac$year <- as.numeric(as.character(pac$year))
#pac <- arrange(year) %>% 
#Rename
pac <- pac %>% rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')
#dropping a column
pac <- subset(pac, select = -c(full, csv, Total, `Country of Origin/Parent Company`, first))
#Rename partys
pac <- pac %>% rename_at ('Dems', ~'Party_D')
pac <- pac %>% rename_at ('Repubs', ~'Party_R')

pac <- pac %>%  filter(country == "UK")
# select and order the columns
pac <- pac %>% select(country, year, Party_D, Party_R) 

#add the totals per year (did not work)
#pac %>% 
 # tapply(pac$Party_D, pac$year, function = sum)+
  #tapply(pac$Party_R, pac$year, function = sum)
#this didnt work either
#pac <- pac %>% 
#  group_by(year) %>% 
#  summarise(Party_D)+
#  summarise(Party_R)

#neither did this
#summarizing the amount per year
 #group_by(year) %>% 
  #summarise(amount = sum(amount))%>% 

#pivot long did not work at first , now it does
pac <- pac %>% 
  pivot_longer(cols = starts_with("Party"),
               names_to = "party",
               values_to = "amount",
               values_drop_na = TRUE)
pac <- pac %>% 
  group_by(year, party, .drop = TRUE) %>% 
  summarize(total_contributions = sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>% 
  ggplot(aes(x = year, y = total_contributions, color = party))+
  geom_line(show.legend = TRUE)+
  geom_line(linewidth = 1) +
  theme_classic() +
  scale_color_manual(values = c("blue", "red"),
                     labels = c("Democrat", "Republican"))+
  scale_y_continuous(breaks = seq(from = 0, to = 3000000, by = 1000000),
                     labels = comma_format(big.mark = ","))+
  #scale_y_discrete (label_dollar ("1M", "2M", "3M"))+
  labs(
    y = "Total amount (USD)",
    x = "Year",
    title = "Contributions to US political parties from UK-connected PACs",
    caption = "OpenSecrets.org")

#view(pac)
#glimpse(pac)

2b - Foreign Connected PACs

#| label: SETUP
#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr, tidyr)
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
options(width = 65)
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# get a list of files with "Foreign Connected PAC" in their names
list_of_files <- dir_ls(path = "data", regexp = "Foreign Connected PAC")
# read all files and row bind them
# keeping track of the file name in a new column called year
pac <- read_csv(list_of_files, id = "year")
Rows: 2394 Columns: 6
── Column specification ─────────────────────────────────────────
Delimiter: ","
chr (5): PAC Name (Affiliate), Country of Origin/Parent Compa...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean the names and convert character amounts to numeric
pac %>% 
   janitor::clean_names()
# A tibble: 2,394 × 6
   year     pac_name_affiliate country_of_origin_pa…¹ total dems 
   <chr>    <chr>              <chr>                  <chr> <chr>
 1 data/Fo… 7-Eleven           Japan/Ito-Yokado       $8500 $1500
 2 data/Fo… ABB Group          Switzerland/Asea Brow… $460… $170…
 3 data/Fo… Accenture          UK/Accenture plc       $759… $230…
 4 data/Fo… ACE INA            UK/ACE Group           $385… $125…
 5 data/Fo… Acuson Corp (Siem… Germany/Siemens AG     $2000 $2000
 6 data/Fo… Adtranz (DaimlerC… Germany/DaimlerChrysl… $105… $100…
 7 data/Fo… AE Staley Manufac… UK/Tate & Lyle         $240… $100…
 8 data/Fo… AEGON USA (AEGON … Netherlands/Aegon NV   $582… $105…
 9 data/Fo… AIM Management Gr… UK/AMVESCAP            $250… $100…
10 data/Fo… Air Liquide Ameri… France/L'Air Liquide … $0    $0   
# ℹ 2,384 more rows
# ℹ abbreviated name: ¹​country_of_origin_parent_company
# ℹ 1 more variable: repubs <chr>
pac$Dems = as.numeric(gsub("\\$", "", pac$Dems))
pac$Repubs = as.numeric(gsub("\\$", "", pac$Repubs))

# Note: another way to format a date into just the year:
# pac$year = format(pac$year, "%Y")

#Datawrangling
#This.was.hard
#split Country/Parent column using '/' as the separator
pac[c('country', 'parent')] <- str_split_fixed(pac$`Country of Origin/Parent Company`, '/', 2)
#split year column using '.' as the separator
pac <-  separate(pac, year, into = c("full", "csv"), sep = "[.]", extra = "drop")
#split full column using '-' as the separator
pac[c('first', 'year')] <- str_split_fixed(pac$`full`, '-', 2)
#convert year to numeric(double)
pac$year <- as.numeric(as.character(pac$year))
#pac <- arrange(year) %>% 
#Rename
pac <- pac %>% rename_at('PAC Name (Affiliate)', ~'pac_name_affilate')
#dropping a column
pac <- subset(pac, select = -c(full, csv, Total, `Country of Origin/Parent Company`, first))
#Rename partys
pac <- pac %>% rename_at ('Dems', ~'Party_D')
pac <- pac %>% rename_at ('Repubs', ~'Party_R')

pac <- pac %>%  filter(country == "Germany")
# select and order the columns
pac <- pac %>% select(country, year, Party_D, Party_R) 

#pivot long did not work at first , now it does
pac <- pac %>% 
  pivot_longer(cols = starts_with("Party"),
               names_to = "party",
               values_to = "amount",
               values_drop_na = TRUE)
pac <- pac %>% 
  group_by(year, party, .drop = TRUE) %>% 
  summarize(total_contributions = sum(amount))
`summarise()` has grouped output by 'year'. You can override
using the `.groups` argument.
pac %>% 
  ggplot(aes(x = year, y = total_contributions, color = party))+
  geom_line(show.legend = TRUE)+
  geom_line(linewidth = 1) +
  theme_classic() +
  scale_color_manual(values = c("blue", "red"),
                     labels = c("Democrat", "Republican"))+
  scale_y_continuous(breaks = seq(from = 0, to = 3000000, by = 500000),
                     labels = comma_format(big.mark = ","))+
  #scale_y_discrete (label_dollar ("1M", "2M", "3M"))+
  labs(
    y = "Total amount (USD)",
    x = "Year",
    title = "Contributions to US political parties from German-connected PACs",
    caption = "OpenSecrets.org")

#Interpretation: As we saw in the contributions to US political parties from UK-connected PACs, the German-connected PACs were similar in that the majority of the contributions were to the Republican party, with the amounts steadily increasing over time with higher amounts noted between 2012 and 2017. We also see that the peak was around 2 million USD as compared to over 3 million USD for the UK-connected PACs.

3 - Median housing prices in the US

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

# Define x-axis break interval as 10 years (did not work)
#year_min <- min(housing$year, na.rm = TRUE)
#year_max <- max(housing$year, na.rm = TRUE)

#If I put the x=date, the line is correct but the labels are all piled up/ OR the error message "Can't convert `x` <date> to <double>.".
#If I put x= year, the line is choppy but the labels are correct
#Finally resolved this issue using scale_x_date with breaks
housing %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
   geom_line(color = "blue")+ 
  scale_x_date(date_breaks = "5 years", date_labels =  "%Y")+ 
  #scale_x_continuous(breaks = seq(from = 1965, to = 2020, by = 5)) +
  #scale_x_discrete(labels = c("1965", "1970", "1975", "1980", "1985", "1990", "1995", "2000", "2005", "2010", "2015", "2020"))+
  #tried to add comma's in the y-axis numbers (unsuccessful at first!) using the labels=comma:
  scale_y_continuous(breaks = seq(from = 0, to = 440000, by = 40000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = "Sources: Census;HUD"
  )+
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0),
          plot.subtitle = element_text(hjust = 0))+
#scale_x_date(date_labels = year)+

# Hide the lyear# Hide the legend
      theme(legend.position = "none")  

#glimpse(housing)
#view(housing)

3b - Median housing prices in the US: RECESSION LINES

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

#housing %>% 
#  mutate(recession = if_else(price>, "TRUE", "FALSE"))


# Define x-axis break interval as 10 years (did not work)
#year_min <- min(housing$date, na.rm = TRUE)
#year_max <- max(housing$date, na.rm = TRUE)

#If I put the x=date, the line is correct but the labels are all piled up.
#If I put x= year, the line is choppy but the labels are correct
housing %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
   geom_line(color = "blue")+ 
  scale_x_date(date_breaks = "5 years", date_labels =  "%Y")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 440000, by = 40000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = "Shaded areas indicate U.S. recessions \nSources: Census; HUD"
  )+
   theme(plot.title = element_text(hjust = -1.2),
          plot.subtitle = element_text(hjust = -0.2))+
 #add recession grey areas to the plot (not working)
   #geom_recessions(
  #fill = "#BDCFDE",
  #alpha = 1,
  #draw_top_bar = TRUE,
  #top_fill = "#bdcfde",
  #top_alpha = 1,
  #method = c("peak","midpoint", "trough" ))+
#scale_x_date(date_labels = year)+

# Hide the lyear# Hide the legend
      theme(legend.position = "none")

3C - Subset of Median Housing 2019 - 2020

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

# load the dataset 
housing<-read_csv(here("data", "median-housing.csv"))
Rows: 234 Columns: 2
── Column specification ─────────────────────────────────────────
Delimiter: ","
dbl  (1): MSPUS
date (1): DATE

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#adjust the date
housing %>% 
  mutate(date = mdy(DATE))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `date = mdy(DATE)`.
Caused by warning:
! All formats failed to parse. No formats found.
# A tibble: 234 × 3
   DATE       MSPUS date  
   <date>     <dbl> <date>
 1 1963-01-01 17800 NA    
 2 1963-04-01 18000 NA    
 3 1963-07-01 17900 NA    
 4 1963-10-01 18500 NA    
 5 1964-01-01 18500 NA    
 6 1964-04-01 18900 NA    
 7 1964-07-01 18900 NA    
 8 1964-10-01 19400 NA    
 9 1965-01-01 20200 NA    
10 1965-04-01 19800 NA    
# ℹ 224 more rows
#Rename price
housing <- housing %>% rename_at ('MSPUS', ~'price')
housing <- housing %>% rename_at ('DATE', ~'date')

#split date column using '-' as the separator
housing [c('year', 'month')] <- str_split_fixed(housing$`date`, '-', 2)

#dropping a column - no need to drop month and date
#housing <- subset(housing, select = -c(month, date))
#convert year to dbl
housing$year <- as.numeric(as.character(housing$year)) 

housing |> 
  filter(date > ymd(20181231), date < ymd(20210101)) %>% 
  ggplot(aes(
    x = date,
    y = price,
  )) +
  geom_line(color = "blue")+ 
  geom_point(color = "blue",
             fill = "white",
             shape = 1 )+
 #unable to add in the quarter labels using scale_x_discrete
  #scale_x_discrete(labels("Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4")+
  scale_y_continuous(breaks = seq(from = 280000, to = 380000, by = 20000),
                     labels = comma_format(big.mark = ","))+
  labs(
    x = "2019                                2020",
    y = "Dollars",
    title = "Median sales price of houses sold in the United States", 
    subtitle = "Not seasonally adjusted",
    caption = ""
  )+
   theme(plot.title = element_text(hjust = -0.60),
          plot.subtitle = element_text(hjust = -0.10))+
# Hide the lyear# Hide the legend
      theme(legend.position = "none")

4 - Expect More. Plot More.

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

library(ggplot2)
df = data.frame(subject <- c('A','B','C'),
                value <- c(100,100,100))

ggplot(df, aes(x = subject, y = value, fill = subject....c..A....B....C..)) +
  scale_fill_manual(values = c("red","white","red")) +
  geom_col() +scale_x_discrete(limits = c("A","B","C")) +coord_polar("y")+
  #removed labels and background
  theme_void()+
  labs(
    x = "",
    y = "",
    caption = "TARGET"
  )+
  theme(legend.position = "none")+
    theme(plot.caption = element_text(color = "red", size = 36, face = "bold", hjust = 0.5))

5 - Mirror, mirror on the wall, who’s the ugliest of them all?

#SETUP for THEMES 
  if (!require("pacman"))
    install.packages("pacman")
pacman::p_load(here)
pacman::p_load(tidyverse, colorspace, palmerpenguins, fs, lubridate, scales, openintro, gghighlight, glue, ggridges, dplyr)

ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

options(width = 65)

knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)
palmerpenguins::penguins
# A tibble: 344 × 8
   species island  bill_length_mm bill_depth_mm flipper_length_mm
   <fct>   <fct>            <dbl>         <dbl>             <int>
 1 Adelie  Torger…           39.1          18.7               181
 2 Adelie  Torger…           39.5          17.4               186
 3 Adelie  Torger…           40.3          18                 195
 4 Adelie  Torger…           NA            NA                  NA
 5 Adelie  Torger…           36.7          19.3               193
 6 Adelie  Torger…           39.3          20.6               190
 7 Adelie  Torger…           38.9          17.8               181
 8 Adelie  Torger…           39.2          19.6               195
 9 Adelie  Torger…           34.1          18.1               193
10 Adelie  Torger…           42            20.2               190
# ℹ 334 more rows
# ℹ 3 more variables: body_mass_g <int>, sex <fct>, year <int>
penguins %>% 
   janitor::clean_names() %>% 
  drop_na(island, body_mass_g) %>% 
  ggplot(aes(x = species, y = body_mass_g, colour = island, size = body_mass_g, na.rm = TRUE)) +
  geom_point() +
  geom_line(linewidth = 2,
            color = "green") +
  theme_dark() +
  scale_color_manual(values = c("purple", "red", "yellow")) +
  labs(
    title = "Ugliest of them All",
    subtitle = "Worst plot colors ever",
    caption = "Source: Palmerpenguins, Tidytuesday"
  ) +
  theme(plot.title = element_text(color = "red", face = "bold", hjust = 0),
        plot.subtitle = element_text(color = "purple", face = "bold"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2
3.4.0.
ℹ Please use `linewidth` instead.

Sources

Question 1

#ggridges citations=

#https://wilkelab.org/ggridges/articles/introduction.html

#https://wilkelab.org/ggridges/

#https://stackoverflow.com/questions/11857935/plot-the-average-values-for-each-level

#https://r-graph-gallery.com/267-reorder-a-variable-in-ggplot2.html

Question 2

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, filtering, summarizing by year and adjusting the labels for the legend=

#https://www.statology.org/remove-dollar-sign-in-r/

#https://www.statology.org/split-column-in-r/

#https://stackoverflow.com/questions/30808474/r-data-wrangling-for-emails

#https://www.statology.org/rename-single-column-in-r/

#R for Data Science chapter 5: https://r4ds.hadley.nz/data-tidy.html

#https://tidyr.tidyverse.org/reference/pivot_longer.html

#https://dplyr.tidyverse.org/reference/filter.html

#https://stackoverflow.com/questions/53808561/how-to-add-points-and-lines-in-ggplot-for-sums-by-years”)

#https://stackoverflow.com/questions/23635662/editing-legend-text-labels-in-ggplot

Question 3a

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify,formatting numbers, correcting the x-axis labels (thank you to my colleague @Wes Scott) =

#https://www.statology.org/remove-dollar-sign-in-r/

#https://www.statology.org/split-column-in-r/

#https://stackoverflow.com/questions/30808474/r-data-wrangling-for-emails

#https://www.statology.org/rename-single-column-in-r/

#R for Data Science chapter 5: https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 3b

#citations for how to remove the dollar sign, how to split columns into two, how to pivot long, label left justify and scale_x_date, also my colleague @WesScott, formatting numbers. =

#https://www.r-bloggers.com/2011/08/use-geom_rect-to-add-recession-bars-to-your-time-series-plots-rstats-ggplot/

#https://stackoverflow.com/questions/78249312/adding-recession-bands-to-ggplot-of-federal-interest-rate-the-taylor-rule-and-y

#https://sergiocorreia.github.io/fedplot/reference/geom_recessions.html

#https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://stackoverflow.com/questions/11748384/formatting-dates-on-x-axis-in-ggplot2

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 3c

#citations for label left justify, open circle shape, formatting numbers =

#https://www.r-bloggers.com/2011/08/use-geom_rect-to-add-recession-bars-to-your-time-series-plots-rstats-ggplot/

#https://stackoverflow.com/questions/78249312/adding-recession-bands-to-ggplot-of-federal-interest-rate-the-taylor-rule-and-y #https://sergiocorreia.github.io/fedplot/reference/geom_recessions.html

#https://r4ds.hadley.nz/data-tidy.html

#https://www.statology.org/ggplot-title-position/

#https://rstudio.github.io/cheatsheets/html/data-visualization.html

#https://www.geeksforgeeks.org/change-formatting-of-numbers-of-ggplot2-plot-axis-in-r/”

Question 4

#citation for how to draw layered donut charts = #https://www.geeksf,orgeeks.org/create-multiple-pie-charts-using-ggplot2-in-r/